library(dplyr)
library(plotly)
library(reshape2)
library(lubridate)
library(randomForestSRC)

One dimensional Partial Dependence Plot

days <- read.csv("day.csv")
hour <- read.csv("hour.csv")
days$dteday <- as_date(days$dteday)
days_select <- select(days, workingday, holiday, temp, hum, windspeed, cnt)
days_select$days_2011 <- int_length(interval(ymd("2011-01-01"), days$dteday)) / (3600*24)
days_select$Winter <- ifelse(days$season == 1, 1, 0)
days_select$Fall <- ifelse(days$season == 4, 1, 0)
days_select$Summer <- ifelse(days$season == 3, 1, 0)
days_select$Misty <- ifelse(days$weathersit == 2, 1, 0)
days_select$Rain <- ifelse(days$weathersit == 3 | days$weathersit == 4, 1, 0)
days_select$Temp <- days_select$temp * 47 - 8
days_select$Hum <- days_select$hum * 100
days_select$Windspeed <- days_select$windspeed * 67
rfsrc <- rfsrc(cnt~., data=days_select) # Creamos la función random forest
res <- select(days_select, days_2011, Temp, Hum, Windspeed, cnt)


rows <- nrow(days_select)
for(c in names(res)[1:4])
{
  for(i in 1:rows){
    resul <- days_select
    resul[[c]] <- days_select[[c]][i]
    pred <- predict(rfsrc, resul)$predicted
    res[[c]][i] <- sum(pred) / rows
  }
}
figure1 = ggplot(days_select, aes(x = Temp, y = res$Temp)) + ylim(0, 6000) + geom_line() + geom_rug(sides="b", alpha = 0.5) + labs(x = "Temperature")
figure2 = ggplot(days_select, aes(x = Hum, y = res$Hum)) + ylim(0, 6000) + geom_line() + geom_rug(sides="b", alpha = 0.5) + labs(x = "Humidity")
figure3 = ggplot(days_select, aes(x = Windspeed, y = res$Windspeed)) + ylim(0, 6000) + geom_line() + geom_rug(sides="b", alpha = 0.5) + labs(x = "Windspeed")
figure4 = ggplot(days_select, aes(x = days_2011, y = res$days_2011)) + ylim(0, 6000) + geom_line() + geom_rug(sides="b", alpha = 0.5) + labs(x = "Days since 2011", y = "Predictions")

subplot(figure1, figure2, figure3,figure4, titleX = TRUE, titleY = TRUE, shareX = FALSE, shareY = TRUE)

2. Bidimensional Partial Dependency Plot

selection <- sample_n(days_select, 40)
Temp <- selection$Temp
Hum <- selection$Hum
Temp_Hum <- inner_join(data.frame(Temp),data.frame(Hum), by=character())
Temp_Hum$p <- 0
for(i in 1:nrow(Temp_Hum)){
  resul <- days_select
  resul[["Temp"]] <- Temp_Hum[["Temp"]][i]
  resul[["Hum"]] <- Temp_Hum[["Hum"]][i]
  
  pred <- predict(rfsrc, resul)$predicted
  Temp_Hum[["p"]][i] <- sum(pred) / rows
}

figure5 = ggplot(Temp_Hum, aes(x = Temp, y = Hum, fill = p)) + geom_tile(width = 10, height = 15) + geom_rug(alpha = 0.5) + labs(x = "Temperature", y = "Humidity", fill = "Num bikes")
figure5

3. PDP to explain the price of a house

house_data <- read.csv("kc_house_data.csv")
selection <- sample_n(house_data, 1000)
selection <- select(selection, bedrooms, bathrooms, sqft_living, sqft_lot, floors, yr_built, price)
rfsrc1 <- rfsrc(price~., data=selection)
res <- select(selection, bedrooms, bathrooms, sqft_living, floors, price)
rows <- nrow(selection)
for(name in names(res)[1:4])
{
  for(i in 1:rows){
    resul <- selection
    resul[[name]] <- selection[[name]][i]
    predic <- predict(rfsrc1, resul)$predicted
    res[[name]][i] <- sum(predic) / rows
  }
}
figure6 = ggplot(selection, aes(x = bedrooms,y = res$bedrooms)) + geom_line() + geom_rug(sides = "b", alpha = 0.5) + labs(x = "Bedrooms",y = "Prediction")
figure7 = ggplot(selection,aes(x=bathrooms,y=res$bathrooms)) + geom_line() +  geom_rug(sides = "b", alpha = 0.5) + labs(x = "Bathrooms", y = "")
figure8 = ggplot(selection,aes(x=sqft_living,y=res$sqft_living)) + geom_line() +  geom_rug(sides = "b", alpha = 0.5) + labs(x = "Sqft Living", y = "")
figure9 = ggplot(selection,aes(x=floors,y=res$floors)) +   geom_line() + geom_rug(sides = "b", alpha = 0.5) + labs(x = "Floors", y = "")
subplot(figure6, figure7, figure8, figure9, titleX = TRUE, titleY = TRUE, shareX = FALSE, shareY = FALSE)